We removed presumably non-r1 derived cells.
urd_RL <- readRDS(file="./data/urd_RL.rds")
non_r1.cell.group <- c("Tlx3","Isl1","Is","10")
cell.removed <- cellsInCluster(urd_RL, "ident", non_r1.cell.group)
length(cell.removed)
[1] 136
cells.keep <- setdiff(colnames(urd@logupx.data), cell.removed)
length(cells.keep)
[1] 1818
urd.trimmed <- urdSubset(urd_RL, cells.keep=cells.keep)
plotDim(urd.trimmed, "ident",label.clusters = T, legend = F)
saveRDS(urd.trimmed, "./data/urd_trimmed.rds")
This step will take some time. We will skip this one and load the previously saved object.
urd <- readRDS("./data/urd_trimmed.rds")
# Define C1 cells as root cells
plotDimHighlight(urd, "ident", "C1", legend=F)
root.cells <- cellsInCluster(urd, "ident", c("C1"))
dm <- readRDS("./data/dm.s16_n75.rds")
flood.result <- readRDS(file="./data/flood-dm.s16-random200.rds")
urd <- importDM(urd, dm)
urd <- floodPseudotimeProcess(urd, flood.result, floods.name="pseudotime", max.frac.NA=0.4, pseudotime.fun=mean, stability.div=20)
### Inspect pseudotime
p0 <- pseudotimePlotStabilityOverall(urd) +
geom_hline(yintercept = 2.5, linetype="dashed", color = "red")
p1 <- plotDim(urd, "ident", legend=F, plot.title="Putative tip cells", alpha=1,label.clusters = T)
p2 <- plotDim(urd, "pseudotime", plot.title = "Pseudotime",legend=F)
print(plot_grid(p0,p1,p2, ncol = 3))
# Create a data.frame that includes pseudotime and stage information
gg.data <- cbind(urd@pseudotime, urd@meta[rownames(urd@pseudotime),])
gg.data$tip.Name <- urd@group.ids[rownames(gg.data),]$ident
# Plot pseudotime
ggplot(gg.data, aes(x=pseudotime, color=tip.Name, fill=tip.Name)) + geom_density(alpha=0.4) + theme_bw()+theme(legend.position="none")+
facet_wrap( ~ tip.Name, ncol=6)
ggplot(gg.data, aes(x=pseudotime, color=tip.Name, fill=tip.Name)) + geom_density(alpha=0.4) + theme_bw()+theme(legend.position="none")
# pass the cluster to claster name
urd@group.ids$tip.name <- NULL
urd@group.ids$tip.name[urd@group.ids$ident== "mCN"] <- "mCN"
urd@group.ids$tip.name[urd@group.ids$ident== "lCN"] <- "lCN"
urd@group.ids$tip.name[urd@group.ids$ident== "GC"] <- "GC"
urd@group.ids$class <- NULL
urd@group.ids$class[!is.na(urd@group.ids$tip.name)] <- "Tip"
urd@group.ids$tip.num <- NA
urd@group.ids$tip.num[urd@group.ids$ident== "mCN"] <- "3"
urd@group.ids$tip.num[urd@group.ids$ident== "lCN"] <- "2"
urd@group.ids$tip.num[urd@group.ids$ident== "GC"] <- "1"
saveRDS(urd, file="./data/urd.rds")
# Define parameters of logistic function to bias transition probabilities
diffusion.logistic <- pseudotimeDetermineLogistic(urd, "pseudotime", optimal.cells.forward=20, max.cells.back=40, pseudotime.direction="<", do.plot=T, print.values=T)
[1] "Mean pseudotime back (~40 cells) 0.0139819579440117"
[1] "Chance of accepted move to equal pseudotime is 0.82442639407355"
[1] "Mean pseudotime forward (~20 cells) -0.00695152465444183"
biased.tm <- pseudotimeWeightTransitionMatrix(urd, pseudotime = "pseudotime", logistic.params = diffusion.logistic, pseudotime.direction = "<")
# We ran this in the HPC cluster
# # # Simulate the biased random walks from each tip
# walks <- simulateRandomWalksFromTips(urd, tip.group.id = "tip.num", root.cells = root.cells,
# transition.matrix = biased.tm, n.per.tip = 25000, root.visits = 1,
# max.steps = 5000, verbose = T)
# saveRDS(walks, file = "./data/walks_n25000.rds")
# retrieve previously calculated random walks results
walks <- readRDS("./data/walks_n25000.rds")
# Process the biased random walks into visitation frequencies
urd <- processRandomWalksFromTips(urd, walks, verbose = T)
[1] "2019-01-22 13:51:08 - Processing walks from tip 1"
[1] "Calculating pseudotime with 2500 walks."
[1] "Calculating pseudotime with 5000 walks."
[1] "Calculating pseudotime with 7500 walks."
[1] "Calculating pseudotime with 10000 walks."
[1] "Calculating pseudotime with 12500 walks."
[1] "Calculating pseudotime with 15000 walks."
[1] "Calculating pseudotime with 17500 walks."
[1] "Calculating pseudotime with 20000 walks."
[1] "Calculating pseudotime with 22500 walks."
[1] "Calculating pseudotime with 25000 walks."
[1] "2019-01-22 13:51:13 - Processing walks from tip 2"
[1] "Calculating pseudotime with 2500 walks."
[1] "Calculating pseudotime with 5000 walks."
[1] "Calculating pseudotime with 7500 walks."
[1] "Calculating pseudotime with 10000 walks."
[1] "Calculating pseudotime with 12500 walks."
[1] "Calculating pseudotime with 15000 walks."
[1] "Calculating pseudotime with 17500 walks."
[1] "Calculating pseudotime with 20000 walks."
[1] "Calculating pseudotime with 22500 walks."
[1] "Calculating pseudotime with 25000 walks."
[1] "2019-01-22 13:51:19 - Processing walks from tip 3"
[1] "Calculating pseudotime with 2500 walks."
[1] "Calculating pseudotime with 5000 walks."
[1] "Calculating pseudotime with 7500 walks."
[1] "Calculating pseudotime with 10000 walks."
[1] "Calculating pseudotime with 12500 walks."
[1] "Calculating pseudotime with 15000 walks."
[1] "Calculating pseudotime with 17500 walks."
[1] "Calculating pseudotime with 20000 walks."
[1] "Calculating pseudotime with 22500 walks."
[1] "Calculating pseudotime with 25000 walks."
saveRDS(urd, file="./data/urd.rds")
gridExtra::grid.arrange(grobs=list(
plotDim(urd, "tip.name", plot.title="Cells in each tip"),
plotDim(urd, "visitfreq.log.1", transitions.plot=10000, plot.title="visitfreq_GC"),
plotDim(urd, "visitfreq.log.2", transitions.plot=10000, plot.title="visitfreq_mCN"),
plotDim(urd, "visitfreq.log.3", transitions.plot=10000, plot.title="visitfreq_lCN")
))
urd.tree <- loadTipCells(urd, "tip.num")
tip.id <- setdiff(unique(urd.tree@group.ids$tip.num),c(NA))
urd.tree <- buildTree(urd.tree, pseudotime = "pseudotime", tips.use = tip.id, divergence.method = "ks",
cells.per.pseudotime.bin = 30, bins.per.pseudotime.window = 8, minimum.visits = 10,
visit.threshold = 0.7, use.only.original.tips = T,
save.all.breakpoint.info = T, p.thresh=0.05)
[1] "Calculating divergence between 1 and 3 (Pseudotime 0 to 0.591)"
[1] "Calculating divergence between 1 and 2 (Pseudotime 0 to 0.591)"
[1] "Calculating divergence between 3 and 2 (Pseudotime 0 to 0.62)"
[1] "Joining segments 3 and 2 at pseudotime 0.573 to create segment 4"
[1] "Calculating divergence between 1 and 4 (Pseudotime 0 to 0.573)"
[1] "Joining segments 1 and 4 at pseudotime 0.462 to create segment 5"
[1] "Assigning cells to segments."
73 cells were not visited by a branch that exists at their pseudotime and were not assigned.
[1] "Collapsing short segments."
[1] "Removing singleton segments."
[1] "Reassigning cells to segments."
73 cells were not visited by a branch that exists at their pseudotime and were not assigned.
[1] "Assigning cells to nodes."
[1] "Laying out tree."
[1] "Adding cells to tree."
# Name the segments
urd.tree <- nameSegments(urd.tree, segments= tip.id,
segment.names = c("GC", "mCN","lCN"),
short.names = tip.id)
plotTree(urd.tree, "segment", title="URD tree segment", label.segments = T)
plotTree(urd.tree, "ident", title="URD cluster ID", label.segments = T)
gridExtra::grid.arrange(grobs=list(
plotTree(urd.tree, "Lmx1a", title="mCN marker_Lmx1a"),
plotTree(urd.tree, "Olig2", title="lCN marker_Olig2"),
plotTree(urd.tree, "Atoh1", title="GC marker_Atoh1"),
plotTree(urd.tree, "Wnt1", title="C1 marker_Wnt1")
))
saveRDS(urd.tree, file = "./data/urdTree.rds")
urd.tree <- readRDS("./data/urdTree.rds")
colnames(urd.tree@meta) <- c("n.Genes","n.Trans","orig.ident","percent.mito","tree.ident","clust")
tips.to.run <- setdiff(as.character(urd.tree@tree$segment.names), NA)
genes.use <- NULL # Calculate for all genes
# Calculate the markers of each other population.
gene.markers <- list()
markers.sum <- NULL
for (tip in tips.to.run) {
print(paste0(Sys.time(), ": ", tip))
markers <- aucprTestAlongTree(urd.tree, pseudotime = "pseudotime", tips = tip, log.effect.size = 0.4,
auc.factor = 1.25, max.auc.threshold = 0.85, frac.must.express = 0.1,
frac.min.diff = 0.1, genes.use = genes.use, root = NULL,
segs.to.skip = NULL, only.return.global = F, must.beat.sibs = 0.6,
report.debug = T)
gene.markers[[tip]] <- markers
}
saveRDS(gene.markers, "./data/geneMarkers.rds")
# Separate actual marker lists from the stats lists
gene.markers.de <- lapply(gene.markers, function(x) x[[1]])
gene.markers.stats <- lapply(gene.markers, function(x) x[[2]])
names(gene.markers.de) <- names(gene.markers)
names(gene.markers.stats) <- names(gene.markers)
# Compile all comparison stats into a single table
all.de.stats <- do.call("rbind", gene.markers.stats)
all.de.stats$tip <- substr(rownames(all.de.stats),1,nchar(rownames(all.de.stats))-2)
# Do a few plots
p1 <- ggplot(all.de.stats, aes(x=pt.1.mean, y=pt.2.mean)) + geom_point() + theme_bw() + geom_abline(slope = 1, intercept=0, col='red', lty=2) + labs(x="Mean Pseudotime (Group 1)", y="Mean Pseudotime (Group 2)")
p2 <- ggplot(all.de.stats, aes(x=genes.1.mean, y=genes.2.mean)) + geom_point() + theme_bw() + geom_abline(slope = 1, intercept=0, col='red', lty=2) + labs(x="Mean Detected Genes (Group 1)", y="Mean Detected Genes (Group 2)")
p3 <- ggplot(all.de.stats, aes(x=trans.1.mean, y=trans.2.mean)) + geom_point() + theme_bw() + geom_abline(slope = 1, intercept=0, col='red', lty=2) + labs(x="Mean Transcripts (Group 1)", y="Mean Transcripts (Group 2)")
plot_grid(p1,p2,p3, ncol = 3)
# Create a fold to hold the results
path <- ".data/"
dir.create(paste0(path,"cascades"))
# Generate impulse fits
gene.cascades <- lapply(tips.to.run, function(tip) {
print(paste0(Sys.time(), ": Impulse Fit ", tip))
seg.cells <- cellsAlongLineage(urd.tree, tip, remove.root=F)
casc <- geneCascadeProcess(object = urd.tree, pseudotime='pseudotime', cells = seg.cells, genes= rownames(gene.markers.de[[tip]]),
moving.window=3, cells.per.window=10, limit.single.sigmoid.slopes = "on", verbose = T)
tip.file.name <- gsub("/", "_", tip)
saveRDS(casc, file=paste0(path, "cascades/casc_", tip.file.name, ".rds"))
return(casc)
})
names(gene.cascades) <- tips.to.run
# Make a heatmap of every cascade in a single PDF.
for (tip in tips.to.run) {
gene.num <- nrow(gene.cascades[[tip]]$scaled.expression)
geneCascadeHeatmap(cascade=gene.cascades[[tip]], title = tip, row.font.size = 0.06*gene.num)
}
# =====repeat with TF genes only ============
# Identify DE genes of each other population (restricted to transcription factors only)
mGenes <- readRDS("/Volumes/jali/Genome/Annotation1.rds")
TF.use <- intersect(rownames(urd@logupx.data),mGenes[which(mGenes$Type > "nonTF"), "mgi_symbol"]);length(TF.use)
TF.markers <- list()
for (tipn in 1:length(tips.to.run)) {
tip <- tips.to.run[tipn]
print(paste0(Sys.time(), ": ", tip))
markers <- aucprTestAlongTree(urd.tree, pseudotime = "pseudotime", tips = tip, log.effect.size = 0.25,
auc.factor = 1.25, max.auc.threshold = 0.85, frac.must.express = 0.1,
frac.min.diff = 0.1, genes.use = TF.use, root = NULL,
segs.to.skip = NULL, only.return.global = F, must.beat.sibs = 0.6,
report.debug = T)
TF.markers[[tip]] <- markers
}
saveRDS(TF.markers, "./data/geneMarkers_TF.rds")
# ====== Generate impulse fits for DE transcription factors ======
TF.markers.de <- lapply(TF.markers, function(x) x[[1]])
names(TF.markers.de) <- names(TF.markers)
TF.cascades <- lapply(tips.to.run, function(tip) {
print(paste0(Sys.time(), ": Impulse Fit ", tip))
seg.cells <- cellsAlongLineage(urd.tree, tip, remove.root=F)
casc <- geneCascadeProcess(object = urd.tree, pseudotime='pseudotime', cells = seg.cells, genes= rownames(TF.markers.de[[tip]]),
# background.genes = sample(setdiff(rownames(urd.tree@logupx.data),urd.tree@var.genes), 1000),
moving.window=3, cells.per.window=10, limit.single.sigmoid.slopes = "on", verbose = T)
return(casc)
})
names(TF.cascades) <- tips.to.run
saveRDS(TF.cascades, "./data/TF.cascades.rds")
# Make a heatmap of every cascade in a single PDF.
pdf(file=paste0(path,"cascades/cascades_TF.pdf"), width=5, height=6)
for (tip in tips.to.run) {
gene.num <- nrow(TF.cascades[[tip]]$scaled.expression)
geneCascadeHeatmap(cascade=TF.cascades[[tip]], title = tip, row.font.size = 0.6*gene.num)
}
dev.off()
# create a table of markers genes with timing
markers.sum <- NULL
for (tip in tips.to.run) {
res <- gene.markers.de[[tip]]
res$tip <- tip
cascade=gene.cascades[[tip]]
# Correct for NA timings
timing <- cascade$timing
timing[intersect(which(is.na(timing$time.on)), which(is.infinite(timing$time.off))), "time.on"] <- Inf
res <- cbind(res,timing)
gene.order <- order(timing$time.on, timing$time.off, na.last=F)
res <- res[gene.order,]
res$gene <- rownames(res)
res$TF <- mGenes$Type[match(res$gene,mGenes$mgi_symbol)]
res$description <- mGenes$description[match(res$gene,mGenes$mgi_symbol)]
markers.sum <- rbind(markers.sum, res)
}
head(markers.sum)
### ====== Identify markers of each lineage ======
markers.sum <- NULL
tip <- "GC"
tip.file.name <- gsub("/", "_", tip)
cascade <- readRDS(file = paste0("./cascades/casc_", tip.file.name, ".rds"))
markers <- rownames(cascade$scaled.expression)
# Determine which genes are also global markers
cells <- cellsInCluster(urd.tree, "segment", c("1"))
markers.global <- markersAUCPR(urd.tree, cells.1 = cells, cells.2 = NULL, genes.use = markers, clustering = "segment")
marker.thresh <- aucprThreshold(cells.1 = cells, cells.2 = setdiff(unlist(urd.tree@tree$cells.in.segment), cells), factor = 2.5, max.auc = Inf) # lower the stringency by reducing the factor
de.markers <- markers.global[markers.global$AUCPR >= marker.thresh,];nrow(de.markers)
de.markers$tip <- tip
de.markers$gene <- rownames(de.markers)
de.markers$TF <- mGenes$Type[match(de.markers$gene,mGenes$mgi_symbol)]
de.markers$description <- mGenes$description[match(de.markers$gene,mGenes$mgi_symbol)]
markers.sum <- rbind(markers.sum, de.markers)
# find marker of caudal thalamus
tip <- "lCN"
tip.file.name <- gsub("/", "_", tip)
cascade <- readRDS(file = paste0("./cascades/casc_", tip.file.name, ".rds"))
markers <- rownames(cascade$scaled.expression)
cells <- cellsInCluster(urd.tree, "segment", c("2"))
markers.global <- markersAUCPR(urd.tree, cells.1 = cells, genes.use = markers, clustering = "segment")
marker.thresh <- aucprThreshold(cells.1 = cells, cells.2 = setdiff(unlist(urd.tree@tree$cells.in.segment), cells), factor = 2.5, max.auc = Inf)
de.markers <- markers.global[markers.global$AUCPR >= marker.thresh,];nrow(de.markers)
de.markers$tip <- tip
de.markers$gene <- rownames(de.markers)
de.markers$TF <- mGenes$Type[match(de.markers$gene,mGenes$mgi_symbol)]
de.markers$description <- mGenes$description[match(de.markers$gene,mGenes$mgi_symbol)]
markers.sum <- rbind(markers.sum, de.markers)
# find marker of caudal thalamus
tip <- "mCN"
tip.file.name <- gsub("/", "_", tip)
cascade <- readRDS(file = paste0("./cascades/casc_", tip.file.name, ".rds"))
markers <- rownames(cascade$scaled.expression)
cells <- cellsInCluster(urd.tree, "segment", c("3"))
markers.global <- markersAUCPR(urd.tree, cells.1 = cells, cells.2 = NULL, genes.use = markers, clustering = "segment")
marker.thresh <- aucprThreshold(cells.1 = cells, cells.2 = setdiff(unlist(urd.tree@tree$cells.in.segment), cells),
factor = 2.5, max.auc = Inf) # lower the stringency by reducing the factor
de.markers <- markers.global[markers.global$AUCPR >= marker.thresh,];nrow(de.markers)
de.markers$tip <- tip
de.markers$gene <- rownames(de.markers)
de.markers$TF <- mGenes$Type[match(de.markers$gene,mGenes$mgi_symbol)]
de.markers$description <- mGenes$description[match(de.markers$gene,mGenes$mgi_symbol)]
markers.sum <- rbind(markers.sum, de.markers)
# openxlsx::write.xlsx(markers.sum, file = "./tables/segmantMarkers_all.xlsx")
openxlsx::write.xlsx(markers.sum, file = "./tables/lineageMarkers_all.xlsx")
sessionInfo()
R version 3.4.3 (2017-11-30)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: OS X El Capitan 10.11.6
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] bindrcpp_0.2.2 Seurat_2.3.3 cowplot_0.9.2 URD_1.0.2 Matrix_1.2-14 ggplot2_3.1.0.9000
loaded via a namespace (and not attached):
[1] readxl_1.1.0 snow_0.4-2 backports_1.1.2 Hmisc_4.1-1 RcppEigen_0.3.3.5.0 plyr_1.8.4 igraph_1.2.2
[8] lazyeval_0.2.1 sp_1.2-7 splines_3.4.3 digest_0.6.18 foreach_1.4.4 htmltools_0.3.6 viridis_0.5.1
[15] lars_1.2 gdata_2.18.0 magrittr_1.5 checkmate_1.8.5 cluster_2.0.7-1 mixtools_1.1.0 ROCR_1.0-7
[22] openxlsx_4.0.17 gmodels_2.18.1 R.utils_2.6.0 xts_0.10-2 colorspace_1.3-2 ggrepel_0.8.0 haven_1.1.1
[29] dplyr_0.7.8 jsonlite_1.5 lme4_1.1-17 bindr_0.1.1 ape_5.1 survival_2.42-3 zoo_1.8-1
[36] iterators_1.0.10 glue_1.3.0 polyclip_1.9-1 gtable_0.2.0 MatrixModels_0.4-1 car_3.0-0 kernlab_0.9-26
[43] prabclus_2.2-6 BiocGenerics_0.24.0 DEoptimR_1.0-8 abind_1.4-5 SparseM_1.77 VIM_4.7.0 scales_1.0.0.9000
[50] mvtnorm_1.0-8 Rcpp_1.0.0 dtw_1.20-1 metap_0.9 viridisLite_0.3.0 laeken_0.4.6 htmlTable_1.11.2
[57] reticulate_1.8 bit_1.1-12 foreign_0.8-70 proxy_0.4-22 mclust_5.4 SDMTools_1.1-221 Formula_1.2-3
[64] tsne_0.1-3 stats4_3.4.3 vcd_1.4-4 htmlwidgets_1.2 gplots_3.0.1 RColorBrewer_1.1-2 fpc_2.1-11
[71] acepack_1.4.1 modeltools_0.2-21 ica_1.0-2 pkgconfig_2.0.2 R.methodsS3_1.7.1 flexmix_2.3-14 farver_1.1.0
[78] nnet_7.3-12 deldir_0.1-15 labeling_0.3 tidyselect_0.2.5 rlang_0.3.0.9002 reshape2_1.4.3 munsell_0.5.0
[85] cellranger_1.1.0 tools_3.4.3 ggridges_0.5.0 evaluate_0.10.1 stringr_1.3.0 yaml_2.1.19 bit64_0.9-7
[92] knitr_1.20 fitdistrplus_1.0-9 robustbase_0.93-0 caTools_1.17.1.1 purrr_0.2.5 RANN_2.6 ggraph_1.0.2
[99] pbapply_1.3-4 nlme_3.1-137 R.oo_1.22.0 hdf5r_1.0.0 concaveman_1.0.0 compiler_3.4.3 rstudioapi_0.7
[106] png_0.1-7 curl_3.2 e1071_1.6-8 smoother_1.1 tibble_1.4.2 tweenr_1.0.1 stringi_1.2.4
[113] forcats_0.3.0 lattice_0.20-35 trimcluster_0.1-2 nloptr_1.0.4 diffusionMap_1.1-0 pillar_1.2.2 lmtest_0.9-36
[120] irlba_2.3.2 data.table_1.11.8 bitops_1.0-6 R6_2.3.0 latticeExtra_0.6-28 KernSmooth_2.23-15 gridExtra_2.3
[127] rio_0.5.10 codetools_0.2-15 boot_1.3-20 MASS_7.3-50 gtools_3.5.0 assertthat_0.2.0 destiny_2.6.2
[134] rprojroot_1.3-2 minpack.lm_1.2-1 withr_2.1.2 diptest_0.75-7 parallel_3.4.3 doSNOW_1.0.16 grid_3.4.3
[141] rpart_4.1-13 tidyr_0.8.2 class_7.3-14 minqa_1.2.4 rmarkdown_1.9 segmented_0.5-3.0 carData_3.0-1
[148] Rtsne_0.15 TTR_0.23-3 ggforce_0.1.1 scatterplot3d_0.3-41 Biobase_2.38.0 base64enc_0.1-3